# Aggregate store x group into store indices 
# JHL 

# Clean environment and initialize
rm(list = ls(all = TRUE))

# Packages 
list.of.packages <- c("folderfun", "data.table", "bit64", "lubridate", "foreign", "dplyr", "ggplot2")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages,repos = "http://cran.us.r-project.org")

require(folderfun)
require("data.table")
require("lubridate")
require("parallel")
require("foreign")

# Setup paths
setff("path_home")
path_home <- ffpath_home()

do_path <- sprintf("%s/do/data_prep/01_price_index",path_home)
repository <- sprintf('%s/dta/nielsen/PIq_06_15/storePI_stacked',path_home)
savepath_pi <- paste0(path_home,'/dta/nielsen/PIq_06_15/storePI/PI_%s.csv')

# Source functions 
source(sprintf('%s/07_PIq_store_agg_opt_fns.R',do_path))

store_agg <- function(collapse_opt=F) {

	location = {}
	PI_by_store = {}
	PI_L = {}
	PI_pre = {}
	TVC_VUL = {}
	PI_Torn = {}
	PI_Final = {}

	# 1-100, parallelize by quantile of store_code_uc
	myID <- as.numeric(Sys.getenv("SLURM_ARRAY_TASK_ID"))
	verbose = T 

	prev.time <- verbose.Print.Time(verbose, "List files")

	files = list.files(repository,pattern=".\\.csv",recursive=T,full.names=T)

	# Quantile over the list of files
	temp = lapply(files, function(x) strsplit(x, split = "_")) # Split list of file names based on underscore 
	# Sample file: '/scratch/midway2/jleung/PIq_06_15/storePI_stacked/PI_1000342.csv'
	store_codes = lapply(temp, function(x) strsplit(x[[1]][[length(x[[1]])]], split = "[.]")) # Split further based on . 
	store_codes = lapply(store_codes, function(x) x[[1]][[1]]) # Obtain the store code 
	file.table = data.table(files, as.numeric(unlist(store_codes))) # Combine file names with store code into a data table 
	names(file.table) <- c("file_name","store_codes") # Name data table 

	setkey(file.table,store_codes)

	quant = quantile(unique(file.table$store_codes),seq(0,1,by=0.01))
	file.table = file.table[store_codes>=quant[[myID]] & store_codes<=quant[[myID+1]],]

	prev.time <- verbose.Print.Time(verbose, "List locations", prev.time)

	PI_by_store = lapply(file.table$file_name,fread)
	PI_by_store = lapply(PI_by_store,data.table)
	names(PI_by_store) = file.table$store_codes


	prev.time <- verbose.Print.Time(verbose, "Indices",prev.time)

	cores <- 8

	# Keep only product groups without missing values, stores that are always in the panel 
	PI_pre <- mclapply(PI_by_store, PI.pre, 
					   mc.set.seed = TRUE, mc.cores = cores, mc.preschedule=F)

	PI_L <- mclapply(PI_pre, Calc.PI_L, 
						mc.set.seed = TRUE, mc.cores = cores, mc.preschedule=F)

	TVC_VUL <- mclapply(PI_pre, Calc.TVC.VUL, 
						mc.set.seed = TRUE, mc.cores = cores, mc.preschedule=F)

	PI_Torn <- mclapply(PI_pre, Calc.Tornqvist, 
						mc.set.seed = TRUE, mc.cores = cores, mc.preschedule=F)

	PI_Final <- mcMap(Calc.Final, PI_L, PI_Torn, 
						mc.set.seed = TRUE, mc.cores = cores, mc.preschedule=F)

	prev.time <- verbose.Print.Time(verbose, "Saving",prev.time)

	store_codes <- names(PI_Final)
	
	# Create store codes
	PI_Final = mclapply(names(PI_Final), function(i) if(nrow(PI_Final[[i]])>0) 
						{PI_Final[[i]][,store_code_uc := as.numeric(i)]}, mc.set.seed = T, mc.cores = cores)

	names(PI_Final) <- store_codes 
	head(PI_Final)

	mclapply(names(PI_Final), function(i) {
			write.csv(PI_Final[[i]], file =sprintf(savepath_pi, i), 
					 row.names = FALSE)}, mc.set.seed = TRUE, mc.cores = cores) 

	prev.time <- verbose.Print.Time(verbose, "Done",prev.time)

}

store_agg()


